This sample Rmd file provides a walkthrough of the basics of using COMETS Analytics in R

You can visit the COMETS Analytics vignette at: https://github.com/CBIIT/R-cometsAnalytics/blob/master/RPackageSource/README.md

Sample input sheet file publicly available at: https://github.com/CBIIT/R-cometsAnalytics/blob/master/InputRepository/cometsInputAge.xlsx

1. readCOMETSinput()

Read input sheet and perform integrity checks

# Retrieve the full path of the input data
dir <- system.file("extdata", package="RcometsAnalytics", mustWork=TRUE)
csvfile <- file.path(dir, "cometsInputAge.xlsx")

# Read in and process the input data
exmetabdata <- RcometsAnalytics::readCOMETSinput(csvfile)
## VarMap sheet is read in.
## Metabolites sheet is read in.
## SubjectMetabolites sheet is read in.
## SubjectData sheet is read in.
## Models sheet is read in.
## Model_Types sheet is read in.
## There are 16 categorical variables.
## Running Integrity Check...
## Joining with `by = join_by(hmdb_id)`
## Begin testing models in Models sheet... 
## Filtering subjects according to the rule(s) age< 70. 836 of 1000 are retained.
## Warning in runModel.addRemVars(rem.obj, vars[oneVal], vars.type, "too few
## unique non-missing values", : The variable(s) female, fasted have been removed
## from adjvars because of: too few unique non-missing values
## Warning in runModel.addRemVars(rem.obj, tmp[rem], varSet, "correlated with
## another predictor", : The variable(s) multivitamin.2 have been removed from
## adjvars because of: correlated with another predictor
## Finished testing models in Models sheet.

Plot the variance distribution of the metabolite values

RcometsAnalytics::plotVar(exmetabdata,titlesize=12)
## Warning: The titlefont attribute is deprecated. Use title = list(font = ...)
## instead.

Plot the distribution of the number of samples with minimum values for each metabolite

# note: minimum values are defined as those with the minimum across all samples (ignoring missing values)

RcometsAnalytics::plotMinvalues(exmetabdata,titlesize=12)
## Warning: The titlefont attribute is deprecated. Use title = list(font = ...)
## instead.

2. getModelData()

Set up pre-specified models based on input sheet

# set up "1 Age" model as specified in the input sheet
exmodeldata <- RcometsAnalytics::getModelData(exmetabdata,modlabel="1 Age") 

Set up interactive mode to specify in R

# set up metabolites ~ age model using interactive mode
exmodeldata2 <- RcometsAnalytics::getModelData(exmetabdata, modelspec="Interactive",
    exposures=c("age"))

3. runModel()

Run pre-specified models based on input sheet

excorrdata <- RcometsAnalytics::runModel(exmodeldata, exmetabdata, "test_cohort")
RcometsAnalytics::OutputListToExcel(filename="test_cohort_corr1.xlsx", excorrdata)
## Output saved to file: test_cohort_corr1.xlsx
## [1] "test_cohort_corr1.xlsx"
RcometsAnalytics::showModel(excorrdata,nlines=3)
## 
## ModelSummary:
##   run                   outcomespec exposurespec term nobs message adjvars
## 1   1 _1_2_3_benzenetriol_sulfate_2          age      1000                
## 2   2      _1_2_dipalmitoylglycerol          age      1000                
## 3   3              _1_2_propanediol          age      1000                
##   adjvars.removed adjspec   outcome_uid                        outcome
## 1                         CHEM100006374 1,2,3-benzenetriol sulfate (2)
## 2                             HMDB07098              DG(16:0/16:0/0:0)
## 3                             HMDB01881               Propylene glycol
##   exposure_uid      exposure adj_uid                metabolite_name
## 1          age Age at Entry          1,2,3-benzenetriol sulfate (2)
## 2          age Age at Entry                 1,2-dipalmitoylglycerol
## 3          age Age at Entry                         1,2-propanediol
## 
## Effects:
##   run                   outcomespec exposurespec term     estimate    pvalue
## 1   1 _1_2_3_benzenetriol_sulfate_2          age  age  0.019631700 0.5351971
## 2   2      _1_2_dipalmitoylglycerol          age  age -0.002059914 0.9481272
## 3   3              _1_2_propanediol          age  age -0.018211498 0.5651388
##   pvalue.adj                metabolite_name
## 1  0.6742380 1,2,3-benzenetriol sulfate (2)
## 2  0.9752621        1,2-dipalmitoylglycerol
## 3  0.7032582                1,2-propanediol
## 
## Errors_Warnings:
## [1] type    object  message
## <0 rows> (or 0-length row.names)
## 
## Table1:
##   variable in.model       type    n n.unique min quartile1 median   mean
## 1      age exposure continuous 1000       20  55        59     63 63.208
##   quartile3 max n.missing
## 1        67  74         0
## 
## Info:
##                       name                     value
## 1                     date 2024-05-08 11:28:14.90342
## 2                   cohort               test_cohort
## 3 RcometsAnalytics version                   3.0.0.0
## NULL

Run interactive model

excorrdata2 <- RcometsAnalytics::runModel(exmodeldata2, exmetabdata, "test_cohort")
RcometsAnalytics::OutputListToExcel(filename="test_cohort_corr2.xlsx", excorrdata2)
## Output saved to file: test_cohort_corr2.xlsx
## [1] "test_cohort_corr2.xlsx"
RcometsAnalytics::showModel(excorrdata2,nlines=3)
## 
## ModelSummary:
##   run                   outcomespec exposurespec term nobs message adjvars
## 1   1 _1_2_3_benzenetriol_sulfate_2          age      1000                
## 2   2      _1_2_dipalmitoylglycerol          age      1000                
## 3   3              _1_2_propanediol          age      1000                
##   adjvars.removed adjspec   outcome_uid                        outcome
## 1                         CHEM100006374 1,2,3-benzenetriol sulfate (2)
## 2                             HMDB07098              DG(16:0/16:0/0:0)
## 3                             HMDB01881               Propylene glycol
##   exposure_uid      exposure adj_uid                metabolite_name
## 1          age Age at Entry          1,2,3-benzenetriol sulfate (2)
## 2          age Age at Entry                 1,2-dipalmitoylglycerol
## 3          age Age at Entry                         1,2-propanediol
## 
## Effects:
##   run                   outcomespec exposurespec term     estimate    pvalue
## 1   1 _1_2_3_benzenetriol_sulfate_2          age  age  0.019631700 0.5351971
## 2   2      _1_2_dipalmitoylglycerol          age  age -0.002059914 0.9481272
## 3   3              _1_2_propanediol          age  age -0.018211498 0.5651388
##   pvalue.adj                metabolite_name
## 1  0.6742380 1,2,3-benzenetriol sulfate (2)
## 2  0.9752621        1,2-dipalmitoylglycerol
## 3  0.7032582                1,2-propanediol
## 
## Errors_Warnings:
## [1] type    object  message
## <0 rows> (or 0-length row.names)
## 
## Table1:
##   variable in.model       type    n n.unique min quartile1 median   mean
## 1      age exposure continuous 1000       20  55        59     63 63.208
##   quartile3 max n.missing
## 1        67  74         0
## 
## Info:
##                       name                      value
## 1                     date 2024-05-08 11:28:15.674194
## 2                   cohort                test_cohort
## 3 RcometsAnalytics version                    3.0.0.0
## NULL

4. Example of another analysis

Set up and run stratified correlation analaysis

exmodeldata2 <- RcometsAnalytics::getModelData(exmetabdata,modelspec="Interactive",
                   outcomes=c("lactose","lactate"),
                exposures=c("age","bmi_grp"),strvars="race_grp")

excorrdata2  <- RcometsAnalytics::runModel(exmodeldata2,exmetabdata,"test_cohort")

RcometsAnalytics::showModel(excorrdata2,nlines=3)
## 
## ModelSummary:
##   run outcomespec exposurespec term nobs message adjvars adjvars.removed
## 1   1     lactose          age       912                                
## 2   2     lactate          age       912                                
## 3   3     lactose      bmi_grp       912                                
##   adjspec outcome_uid       outcome exposure_uid      exposure adj_uid
## 1           HMDB00186 Alpha-Lactose          age Age at Entry         
## 2           HMDB00190 L-Lactic acid          age Age at Entry         
## 3           HMDB00186 Alpha-Lactose      bmi_grp       bmi_grp        
##   stratavar strata metabolite_name
## 1  race_grp      0         lactose
## 2  race_grp      0         lactate
## 3  race_grp      0         lactose
## 
## Effects:
##   run outcomespec exposurespec      term    estimate       pvalue stratavar
## 1   1     lactose          age       age  0.11574078 0.0004612988  race_grp
## 2   2     lactate          age       age -0.03393180 0.3060207518  race_grp
## 3   3     lactose      bmi_grp bmi_grp.2 -0.01503931 0.6504904870  race_grp
##   strata metabolite_name
## 1      0         lactose
## 2      0         lactate
## 3      0         lactose
## 
## Errors_Warnings:
##    type object                          message stratavar strata
## 1 ERROR        Stratum contains to few subjects  race_grp      3
## 
## Table1:
##   stratavar strata variable in.model        type category   n n.unique min
## 1  race_grp      0      age exposure  continuous          912       20  55
## 2  race_grp      0  bmi_grp exposure categorical        1 325       NA  NA
## 3  race_grp      0                                      2 335       NA  NA
##   quartile1 median     mean quartile3 max n.missing
## 1        59     63 63.27193        67  74         0
## 2        NA     NA       NA        NA  NA        NA
## 3        NA     NA       NA        NA  NA        NA
## 
## Info:
##                       name                      value
## 1                     date 2024-05-08 11:28:16.119203
## 2                   cohort                test_cohort
## 3 RcometsAnalytics version                    3.0.0.0
## NULL

5. Example of super-batch mode

Use super-batch mode to run all models as specified in input sheet

exallmodels <- RcometsAnalytics::runAllModels(exmetabdata, cohortLabel = "test_cohort") # whatever cohortLabel you choose will be used in autosaved files of results
## Running 1 Age
## Output saved to file: 1_Age__test_cohort__2024-05-08.xlsx
## Running 2 Age Spearman Min 10 Subjects
## Output saved to file: 2_Age_Spearman_Min_10_Subjects__test_cohort__2024-05-08.xlsx
## Running 3 Age Multivariable adjusted
## Output saved to file: 3_Age_Multivariable_adjusted__test_cohort__2024-05-08.xlsx
## Running 4 Age Multivariable adjusted stratified
## Warning in runModel.addRemVars(rem.obj, tmp[rem], varSet, "linearly dependent",
## : The variable(s) race_grp.2 have been removed from adjvars because of:
## linearly dependent
## Warning in runModel.addRemVars(rem.obj, tmp[rem], varSet, "linearly dependent",
## : The variable(s) race_grp.2 have been removed from adjvars because of:
## linearly dependent
## Output saved to file: 4_Age_Multivariable_adjusted_stratified__test_cohort__2024-05-08.xlsx
## Running 5 Age Multivariable adjusted stratified subset
## Filtering subjects according to the rule(s) age< 70. 836 of 1000 are retained.
## Warning in runModel.addRemVars(rem.obj, vars[oneVal], vars.type, "too few
## unique non-missing values", : The variable(s) female, fasted have been removed
## from adjvars because of: too few unique non-missing values
## Warning in runModel.addRemVars(rem.obj, tmp[rem], varSet, "linearly dependent",
## : The variable(s) race_grp.2, multivitamin.2 have been removed from adjvars
## because of: linearly dependent
## Warning in runModel.addRemVars(rem.obj, vars[oneVal], vars.type, "too few
## unique non-missing values", : The variable(s) female, fasted have been removed
## from adjvars because of: too few unique non-missing values
## Warning in runModel.addRemVars(rem.obj, tmp[rem], varSet, "correlated with
## another predictor", : The variable(s) multivitamin.2 have been removed from
## adjvars because of: correlated with another predictor
## Warning in runModel.addRemVars(rem.obj, vars[oneVal], vars.type, "too few
## unique non-missing values", : The variable(s) female, fasted have been removed
## from adjvars because of: too few unique non-missing values
## Warning in runModel.addRemVars(rem.obj, tmp[rem], varSet, "linearly dependent",
## : The variable(s) race_grp.2, multivitamin.2 have been removed from adjvars
## because of: linearly dependent
## Output saved to file: 5_Age_Multivariable_adjusted_stratified_subset__test_cohort__2024-05-08.xlsx
## Running 6 All pairwise metabolites
## Output saved to file: 6_All_pairwise_metabolites__test_cohort__2024-05-08.xlsx
## Running 7 Poisson regression
## Output saved to file: 7_Poisson_regression__test_cohort__2024-05-08.xlsx
## Running 8 Logistic regression
## Output saved to file: 8_Logistic_regression__test_cohort__2024-05-08.xlsx
## Running 9 Survival model
## Output saved to file: 9_Survival_model__test_cohort__2024-05-08.xlsx
## Running 10 Conditional logistic
## Output saved to file: 10_Conditional_logistic__test_cohort__2024-05-08.xlsx
RcometsAnalytics::showModel(exallmodels,nlines=3)
## NULL